#!/bin/sh
#
# Conv-host for IBM SP:
#  Translates +pN-style charmrun options into 
# POE (Parallel Operating Environment) options.

args=""
pes=1

while [ $# -gt 0 ]
  do
  case $1 in
      +p)
	  pes=$2
	  shift
	  ;;
      +ppn)
	  args=$args"$1 $2 "
	  ppnused="$2"
	  shift
	  ;;
      +p[0-9]*)
          pes=`expr substr $1 3 10`
          ;;
      +ppn[0-9]*)
	  ppnused=`expr substr $1 5 12`
	  args=$args"+ppn $ppnused "
	  ;;
      +n)
	  nodes=$2
          shift
          ;;
      +ln)
	  logical=$2
	  shift
	  ;;
      +ns)
	  nodesize=$2
	  shift
	  ;;
      +ll)
	  llfile=$2
	  shift
	  ;;
      +name)
	  jobname=$2
	  shift
	  ;;
      *)
	  args=$args"$1 "
	  ;;
  esac
  shift
done

if [ -n "$args" ]
    then
    args1=`expr substr "$args" 1 1`
fi

if [ -z "$args" -o "$args1" == "-" -o "$args1" == "+" ]
    then
    # print usage help
    echo "help"
    exit
fi

executable=`echo $args | awk '{print $1}'`
args=`echo ${args#*$executable}`

# check if the node size has been set, or a file called .nodesize is present
#if [ -z "$nodesize" ]
#    then
#    if [ -f ".nodesize" ]
#	then
#	nodesize=`cat .nodesize`
#    elif [ -f "$HOME/.nodesize" ]
#	then
#	nodesize=`cat $HOME/.nodesize`
#    fi
#fi

# check that nodesize is a real number
#if [ -n "$nodesize" -a "$nodesize" -gt 0 ]
#    then
#    echo
#    else
#    echo
#    echo "No node size specified. Either use flag \"+ns <size>\" or create a file called"
#    echo "\".nodesize\" in the current directory on in your home directory."
#    echo
#    exit
#fi

# check if the load leveler template file has been set or a file called charmrun.ll is present
if [ -z "$llfile" ]
    then
    if [ -f "charmrun.ll" ]
	then
	llfile="charmrun.ll"
    elif [ -f "$HOME/charmrun.ll" ]
	then
	llfile="$HOME/charmrun.ll"
    fi
fi

if [ -z "$llfile" ]
    then
    echo "No load leveler template specified. Either use the flag \"+ll <llfile>\" or create"
    echo "a file called \"charmrun.ll\" in the current directory on in your home directory."
    echo "An example of the template can be found at src/arch/lapi/charmrun.ll, this may"
    echo "require tuning for the specific machine (for example account_no or class)."
    echo
    exit
fi

if [ -n "$ppnused" ]
then
if [ $ppnused -gt 0 ]
    then
#The MALLOCMULTIHEAP env var helps to improve the memory allocation performance
#by splitting the heap into multiple parts where each thread has its
#own access without locks. However, enabling this var will increase the memory
#usage, so we only use it when it's a smp run.
    heaps=`expr $ppnused + 1`
    if [ $heaps -gt 32 ]
        then
        heaps=32
    fi
    export MALLOCMULTIHEAP=heaps:$heaps
fi
fi

filename="charmrun_script.$$.ll"
interactive=`grep queue $llfile|wc -l`

if [ $interactive -gt 1 ]
    then
    echo "Multiple queueing not supported!"
fi

if [ $interactive -eq 0 ]
then
    node_usage="shared"
else
    node_usage="not_shared"
fi

if [ -n "$ppnused" ]
    then
    total_tasks=`expr $pes / $ppnused`
    if [ $pes -ne `expr $total_tasks \* $ppnused` ]
	then
	echo
	echo "number of processors must be a multiple of number of processors per node!"
	echo
	exit
    fi
    tasks_per_node=1
    if [ -n "$logical" ]
	then
	tasks_per_node=$logical
    fi
    if [ -n "$nodesize" ]
	then
	tasks_per_node=`expr $nodesize / $ppnused`
    else
	echo "WARNING: ns (node size) is missing, it should be used together with ppn!"
    fi
    if [ -n "$logical" -a -n "$nodesize" ]
	then if [ $logical -ne $tasks_per_node ]
	    then
	    echo
	    echo "Both +ln and +ns used, but they do not meet the rule <+ln> = <+ns> / <+ppn>"
	    echo
	    exit
	fi
    fi
    nodes=`expr $total_tasks / $tasks_per_node`
    if [ `expr $nodes '*' $tasks_per_node` -ne $total_tasks ]
      then
      nodes=`expr $nodes + 1` 
    fi 
    if [ $tasks_per_node -gt $total_tasks ]
      then
      tasks_per_node=$total_tasks
    fi
    nodes="#@ node ="$nodes
    tasks_per_node="#@ tasks_per_node = "$tasks_per_node
    total_tasks=""
else
    total_tasks=$pes
    if test -z "$nodes"
    then
      ppn=`prtconf 2>/dev/null | grep 'Number Of Processors:' | sed -e 's/^[^0-9]*//'`
      test x$ppn = x && ppn=4
      if [ `expr $pes / $ppn '*' $ppn` -eq $pes ]
      then
        nodes=`expr $pes / $ppn`
      else
        nodes=`expr $pes / $ppn + 1`
      fi
    fi
    nodes="#@ node = "$nodes
    total_tasks="#@ total_tasks = "$total_tasks 
fi

cat > $filename <<EOF
# System settings
#@ node_usage = $node_usage
#@ network.LAPI = csss,,US
$total_tasks
$nodes
$tasks_per_node

# User settings
EOF

grep -E -v "node|network|tasks|queue" $llfile | grep -E "#@"  >> $filename

cat >> $filename <<EOF

# Default Settings
EOF
present=`grep job_type $llfile|wc -l`
if [ $present -eq 0 ]
    then
    cat >> $filename <<EOF
#@ job_type = parallel
EOF
fi

if [ -z "$jobname" ]
    then
    jobname=$executable
fi

present=`grep job_name $llfile|wc -l`
if [ $present -eq 0 ]
    then
    cat >> $filename <<EOF
#@ job_name = charmrun_$jobname.$$
EOF
fi

present=`grep wall_clock_limit $llfile|wc -l`
if [ $present -eq 0 ]
    then
    cat >> $filename <<EOF
#@ wall_clock_limit = 0:10:00
EOF
fi

present=`grep notification $llfile|wc -l`
if [ $present -eq 0 ]
    then
    cat >> $filename <<EOF
#@ notification = never
EOF
fi

if [ $interactive -ne 0 ]
    then
    present=`grep output $llfile|wc -l`
    if [ $present -eq 0 ]
	then
	cat >> $filename <<EOF
#@ output = output_$jobname.$$
EOF
    fi

    present=`grep error $llfile|wc -l`
    if [ $present -eq 0 ]
        then
        cat >> $filename <<EOF
#@ error = error_$jobname.$$
EOF
    fi
fi
 
cat >> $filename <<EOF
#@ queue

# System defined exports
MP_MSG_API=lapi
LAPI_USE_SHM=yes
MP_INTRDELAY=100
MP_EAGER_LIMIT=65536
MP_SHARED_MEMORY=yes
MP_USE_BULK_XFER=yes
MEMORY_AFFINITY=MCM
MP_TASK_AFFINITY=MCM
MP_EUILIB=us

# User defined exports
EOF

# user exports
grep -E -v "#@" $llfile | grep "export" >> $filename

if [ ! -x "$executable" ]
    then
    executable = "$PWD/$executable"
fi
if [ ! -x "$executable" ]
    then
    echo "Executable not found: $executable"
    exit
fi

if [ $interactive -eq 0 ]
    then
    # submit the job as poe, iterating 
    echo "Running interactively> poe $executable $args -llfile $filename"
    poe $executable $args -llfile $filename
    status=$?
    if [ $status -eq 255 ];
      then
      llstatus=1
    else
      llstatus=0
    fi
    until (exit $llstatus);
      do
      echo "Retrying in two minutes..."
      sleep 120
      poe $executable $args -llfile $filename;
      status=$?
      if [ $status -eq 255 ];
        then
        llstatus=1
      else
        llstatus=0
      fi
    done
    if [ $status -ne 0 ];then exit $status;fi
    
else
    # append the command at the end of the file and llsubmit it
    echo "Submitting batch> poe $executable $args"
    cat >> $filename <<EOF

poe $executable $args
EOF
    llsubmit $filename
fi

rm $filename
